<?php
set_time_limit(0);

$url	= "http://www.baidu.com/";
$url	= "http://www.hao123.com/";
print_r(geturl($url));

function geturl($url){
	$code = file_get_contents($url);
	preg_match_all("/<a[^>]*?\s*href=\"(.*?)\"(.*?)>(.*?)<\/a>/i",$code,$infos);
	$infos = $infos[1];
	$infos = array_unique($infos);
	foreach($infos as $v){
		$domain	= get_domain($v);
		if($domain){
			$urlarr[]	= $domain;
		}
	}
	$urlarr = array_unique($urlarr);
	return $urlarr;
}

function get_domain($url){
	$pattern = "/[\w-]+\.(com|net|org|gov|cc|biz|info|cn)(\.(cn|hk))*/";
	preg_match($pattern, $url, $matches);
	if(count($matches) > 0) {
		return $matches[0];
	}else{
		$rs = parse_url($url);
		$main_url = $rs["host"];
		if(!strcmp(long2ip(sprintf("%u",ip2long($main_url))),$main_url)) {
			return $main_url;
		}elseif(empty($main_url)){
			return false;
		}else{
			$arr = explode(".",$main_url);
			$count=count($arr);
			$endArr = array("com","net","org","3322");//com.cn net.cn 等情况
			if (in_array($arr[$count-2],$endArr)){
				$domain = $arr[$count-3].".".$arr[$count-2].".".$arr[$count-1];
			}else{
				$domain = $arr[$count-2].".".$arr[$count-1];
			}
			return $domain;
		}
	}
}

?>